********************************************************************************************************************************************************************
**************** Constructing Dataset From Original Fixed-length Data ****************************** 
********************************************************************************************************************************************************************

global Working_Directory = "\\Mac\Dropbox\DATA_JFIES\PRO_IV_Child_Benefit\REStat\Japanese_Child_Benefit\" 
global Data_Directory = "$Working_Directory\ORG_JFIES"
global Dictionaries = "$Working_Directory\Dictionary"
				
cd "$Working_Directory"
clear
set more off

********************************************************************************************************************************************************************
********************************************************************************************************************************************************************
***** Laoding the ASCII file and Constructing Variables
********************************************************************************************************************************************************************
********************************************************************************************************************************************************************
gen temp=1
compress
saveold "Japanese_Child_Benefit.dta", replace
clear

forvalues YYYY=1991/2010 {
	local YY=`YYYY'-1900-(100*(`YYYY'>=2000))
	if `YYYY'>=1990&`YYYY'<=1992{
		local year_data=1
	}
	else{
		local year_data=12
	}
	forvalues MM=1/`year_data'{
		clear
		if `YYYY'>=1990&`YYYY'<=1992{
			qui: infile using "$Dictionaries\JFIES_Purpose_1990-1992.dct", using("$Data_Directory\W2_`YYYY'_RCD_Y-`YY'MM_A.txt")
		}
		if `MM'<10 {
			local YYMM="`YY'0`MM'"
		}
		if `MM'>=10  {
			local YYMM="`YY'`MM'"
		}
		if (`YYYY'==1993) {
			qui: infile using "$Dictionaries\JFIES_Purpose_1993.dct", using("$Data_Directory\W2_`YYYY'_RCD_Y-`YYMM'_A.txt")
		}
		if `YYYY'==1994 {
			qui: infile using "$Dictionaries\JFIES_Purpose_1994.dct", using("$Data_Directory\W2_`YYYY'_RCD_Y-`YYMM'_A.txt")
		}
		if (`YYYY'>=1995&`YYYY'<=1998)|(`YYYY'==1999&`MM'<=6) {
			qui: infile using "$Dictionaries\JFIES_Purpose_1995-1999_Jun.dct", using("$Data_Directory\W2_`YYYY'_RCD_Y-`YYMM'_A.txt")
		}
		if (`YYYY'==1999&`MM'>=7) {
			qui: infile using "$Dictionaries\JFIES_Purpose_1999_Jul.dct", using("$Data_Directory\W2_`YYYY'_RCD_Y-`YYMM'_A.txt")
		}
		if (`YYYY'>=2000&`YYYY'<=2001) {
			qui: infile using "$Dictionaries\JFIES_Purpose_2000-2001.dct", using("$Data_Directory\W2_`YYYY'_RCD_ZY-0`YYMM'_A.txt")
		}
		if (`YYYY'>=2002&`YYYY'<=2004){
			qui: infile using "$Dictionaries\JFIES_Purpose_2002-2004.dct", using("$Data_Directory\W2_`YYYY'_RCD_ZY-0`YYMM'_A.txt")
		}
		if (`YYYY'>=2005&`YYYY'<=2009){
			qui: infile using "$Dictionaries\JFIES_Purpose_2005-2009.dct", using("$Data_Directory\W2_`YYYY'_RCD_ZY-0`YYMM'_A.txt")
		}
		if (`YYYY'==2010){
			qui: infile using "$Dictionaries\JFIES_Purpose_2010.dct", using("$Data_Directory\W2_`YYYY'_RCD_ZY-`YYMM'_A.txt")
		}
		
		qui: compress

		**************************************************************
		gen spouse=0
		gen age_spouse=.
		gen job_spouse=.
		gen workornot_spouse=.

		****** In the Household Questionnaire, the order of household members are decided by household
		****** For our convenience, we rearrange the data so that the children are sorted in order of age
		****** child_i: the age of i-th child (if i-th child is not there, child_i=-1) 
		****** child_s_i: the school which i-th child goes (0:no school 1:pre-school 2:elementary school 3:junior high 4:highschool)
		gen child_1=-1
		gen child_2=-1
		gen child_3=-1
		gen child_4=-1
		gen child_5=-1
		gen child_6=-1
		gen child_7=-1
		gen child_8=-1
		gen child_9=-1
		gen child_10=-1

		gen child_s_1=-1
		gen child_s_2=-1
		gen child_s_3=-1
		gen child_s_4=-1
		gen child_s_5=-1
		gen child_s_6=-1
		gen child_s_7=-1
		gen child_s_8=-1
		gen child_s_9=-1
		gen child_s_10=-1
		
		gen eligible_18=0
		cap drop number_member_65
		gen number_member_65=0


		forvalues NM=2/18{
			
	
			replace spouse=spouse+1 if (hm_`NM'_relation==2|hm_`NM'_relation==3)
			replace age_spouse= hm_`NM'_age if (hm_`NM'_relation==2|hm_`NM'_relation==3)
			replace job_spouse= hm_`NM'_job if (hm_`NM'_relation==2|hm_`NM'_relation==3)
			replace workornot_spouse= hm_`NM'_workornot if (hm_`NM'_relation==2|hm_`NM'_relation==3)			
			replace hm_`NM'_school_type=0 if hm_`NM'_school_type==.
			replace eligible_18    =  eligible_18 +((hm_`NM'_age<18)|(hm_`NM'_age==18&hm_`NM'_school_type==4)) if hm_`NM'_relation==4&hm_`NM'_age!=.
			replace number_member_65=number_member_65+(hm_`NM'_age>=65&hm_`NM'_age!=.)

			****** If ='NM'th household member is a "child of HH head" and aged between (10-m-1)th and (10-m)th child, order of children is revised  
			forvalues m = 2/10 {
				local birth_order=10-`m'
				local birth_order2=`birth_order'+1
				forvalues n = `m'(-1)2 {
					local older2 = `birth_order' + `n'
					local older1 = `older2'-1
					if `birth_order'>0 {
						replace child_`older2'=child_`older1' if hm_`NM'_relation==4&hm_`NM'_age!=.&hm_`NM'_age>=child_`birth_order2'&hm_`NM'_age<child_`birth_order'
						replace child_s_`older2'=child_s_`older1' if hm_`NM'_relation==4&hm_`NM'_age!=.&hm_`NM'_age>=child_`birth_order2'&hm_`NM'_age<child_`birth_order'
					}
					else {
						replace child_`older2'=child_`older1' if hm_`NM'_relation==4&hm_`NM'_age!=.&hm_`NM'_age>=child_`birth_order2'
						replace child_s_`older2'=child_s_`older1' if hm_`NM'_relation==4&hm_`NM'_age!=.&hm_`NM'_age>=child_`birth_order2'
					}
				}
				if `birth_order'>0 {
					replace child_`birth_order2'=hm_`NM'_age if hm_`NM'_relation==4&hm_`NM'_age!=.&hm_`NM'_age>=child_`birth_order2'&hm_`NM'_age<child_`birth_order'
					replace child_s_`birth_order2'=hm_`NM'_school_type if hm_`NM'_relation==4&hm_`NM'_age!=.&hm_`NM'_age>=child_`birth_order2'&hm_`NM'_age<child_`birth_order'
				}
				else {
					replace child_`birth_order2'=hm_`NM'_age if hm_`NM'_relation==4&hm_`NM'_age!=.&hm_`NM'_age>=child_`birth_order2'
					replace child_s_`birth_order2'=hm_`NM'_school_type if hm_`NM'_relation==4&hm_`NM'_age!=.&hm_`NM'_age>=child_`birth_order2'
				}
			}
		
		
		}
		****************************************************************************************
        **** Until 2000, "no job" is coded as "0" but "10" afterwards 
		replace job=10 if job==0
		replace job_spouse=10 if job_spouse==0

		cap gen other_ssb=.
		
		gen nd= food+fuel+domestic_utencils+domestic_nondurable+hh_related_service+clothing_and_footwear+public_trans+automotive_maintenance+communications+recreation_goods+readings+recreation_service+miscellaneous+pocket_money+social_expenses

		global base "year month city_code unit_code household_code serial_number house_ownership yearly_income number_member household_type inflation_factor eligible_18 number_member_65"
		global head "sex age firm_size job age_spouse job_spouse workornot_spouse"
		global income "total_income bonus social_security other_ssb"
		global cons "nd"
		keep $base $head $income $cons child_*

				
		****  Sample Selection  ******************************************************************************
		**********************************************************************************************************
		***** Use those who have at leaset one child and the oldest should be aged under 15
		drop if child_1>=15|child_1==-1

		***** Use Employees only for whom monthly income is available
		drop if job>4

		***** Use male head households only (single mothers may receive other subsidies)
		drop if sex==2|household_type==2

		***** Household head and his wife should be aged 55 or under
		drop if age>55|age_spouse>55

		***** Excluding mi-reporting (or unusual) households
		** Highschool student under age 15
		forvalues i=1/10{
			drop if child_`i'<15&child_s_`i'==4
		}
		** Junior Highschool student under age 12
		forvalues i=1/10{
			drop if child_`i'<12&child_s_`i'==3
		}
		** Elementary school student under age 6
		forvalues i=1/10{
			drop if child_`i'<6&child_s_`i'==2
		}
		** High-, Junir High, Elementary, and Pre- school student over age 18
		forvalues i=1/10{
			drop if child_`i'>18&child_s_`i'<5&child_s_`i'>0
		}
		** Junir High, Elementary, and Pre- school student over age 15
		forvalues i=1/10{
			drop if child_`i'>15&child_s_`i'<4&child_s_`i'>0
		}
		** Elementary and Pre- school student over age 12
		forvalues i=1/10{
			drop if child_`i'>12&child_s_`i'<3&child_s_`i'>0
		}
		***** Age difference between parents and child is too small
		drop if (age-child_1)<15
		drop if (age_spouse-child_1)<15
		**********************************************************************************************************
		**********************************************************************************************************

		append using "Japanese_Child_Benefit.dta"
		cap: drop temp
		compress
		saveold "Japanese_Child_Benefit.dta", replace
	}
}



********************************************************************************************************************************************************************
********************************************************************************************************************************************************************
******* Constructing variables and some more sample selections 
********************************************************************************************************************************************************************
********************************************************************************************************************************************************************
use "Japanese_Child_Benefit.dta", clear

gen period=(year-1960)*12+month-1

sort period
merge period using "cpi.dta", uniqus
drop _merge
drop if city_code==.

sort period
merge period using "calendar.dta", uniqus
drop _merge
drop if city_code==.


**********************************************
*** Constructing Household unique ID
**********************************************
do "Make_Sub_ID.do"
*********************************************

tsset id period

************************************************************************************************
******　Constructiong Variable　**************
************************************************************************************************
***** Yearly Income surveyed in the last interview should be substituted in the earlier interviews
***** otherwise, more imputed values should be used
egen maxinterview=max(interview), by(id)
egen maxy=max(yearly_income*(interview==maxinterview)), by(id)
replace yearly_income=maxy*10
drop maxinterview maxy

*******************************************************************************************************
********** Calculating the amount of child benefit *************
*******************************************************************************************************
do "Make_Sub_Calculating_Benefit.do"		
*******************************************************************************************************

***** Calculating daily basis consumption measures in 1,000 yen *******
global cons "nd"
foreach X of global cons{
	replace `X'=365*(`X'/num_day)/12000
}
***** Monthly income is in 1,000 yen *******
global income "total_income bonus social_security other_ssb"
foreach X of global income{
	replace `X'=`X'/1000
}

******** Deflated by the CPI
replace x_pay=round(100*x_pay/cpi,1)
replace social_security=round(100*social_security/cpi,1)
replace total_income=round(100*total_income/cpi,1)
replace bonus=round(100*bonus/cpi,1)
replace nd=round(100*nd/cpi,1)
gen real_yy=round(100*yearly_income/cpi,1)

****** Excluding bonuses from total_income
gen total_income2=total_income-bonus

compress


*******************************************************************************************************
********** Additional Sample Selection  **************************************************************************
*******************************************************************************************************
***** We only use those whose head is unchanged
gen chg=((D.age<0|D.age>1)&L.age!=.)|(sex!=L.sex&L.sex!=.)
egen chg_all=sum(chg), by(id)
drop if chg_all>0
drop chg chg_all

drop if year<1992
drop if year>=2010
drop if test_y>line

*******************************************************************************************************
********** Regression  **************************************************************************
*******************************************************************************************************

****** Set up dummies
tab month, gen(md)
tab year, gen(yeard)
tab interview, gen(interviewd)
gen age2=age^2

cap log close
log using "./Results_RESTAT_Final.txt", text replace 

********** Column (1) Impact on Nondurable consumption *******************************
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 D.total_income2 [w=inflation_factor], cluster(id)
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 (D.total_income2=D.x_pay) [w=inflation_factor], cluster(id) first
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 D.x_pay [w=inflation_factor], cluster(id)


********** Separate Regression below and above median income ***************
egen median_y=median(real_yy)
********** Column (2) Below the median ******
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 D.total_income2 [w=inflation_factor] if real_yy<=median_y, cluster(id)
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 (D.total_income2=D.x_pay) [w=inflation_factor] if real_yy<=median_y, cluster(id) first
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 D.x_pay [w=inflation_factor] if real_yy<=median_y, cluster(id) 

********** Column (3) Above the median ******
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 D.total_income2 [w=inflation_factor] if real_yy>median_y, cluster(id)
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 (D.total_income2=D.x_pay) [w=inflation_factor] if real_yy>median_y, cluster(id) first
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 D.x_pay [w=inflation_factor] if real_yy>median_y, cluster(id) 


********** Column (4) Correctly Reporting Household Only *****************************

***** Households are regarded as a correct reporter if the imputed amount of child benefit is equal to the self-reported one
gen correct_reporter =(D.x_pay!=.&D.social_security==D.x_pay)

ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 D.total_income2 [w=inflation_factor] if correct_reporter, cluster(id)
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 (D.total_income2 = D.x_pay) [w=inflation_factor] if correct_reporter, cluster(id) first
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 D.x_pay [w=inflation_factor] if correct_reporter, cluster(id)

********** Column (5) Inverse Probability Weighting *****************************
 
probit correct_reporter yeard* md* interviewd* D.number_member age age2
predict fitted, xb
gen prob_correct_reporter=normprob(fitted)
su prob_correct_reporter

gen inflation_factor2=inflation_factor/prob_correct_reporter

ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 D.total_income2 [w=inflation_factor2] if correct_reporter, cluster(id)
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 (D.total_income2=D.x_pay) [w=inflation_factor2] if correct_reporter, cluster(id) first
ivreg2 D.nd yeard* md* interviewd* D.number_member age age2 D.x_pay [w=inflation_factor2] if correct_reporter, cluster(id)

log close
